ZhongXu has asked that I make him the HODs from the SHAMs I made him. This should be pretty straightforward so I'll do it quickly here.


In [1]:
import numpy as np
import astropy
from pearce.mocks import cat_dict
from pearce.mocks.assembias_models.table_utils import compute_prim_haloprop_bins
import h5py

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from itertools import cycle

In [3]:
%%bash
ls /home/users/swmclau2/scratch/*mpeak*.hdf5 -ltr


-rw-r--r-- 1 swmclau2 kipac   28562856 Dec 18  2017 /home/users/swmclau2/scratch/catalog_ab_halo_vmax@mpeak_shuffled.hdf5
-rw-r--r-- 1 swmclau2 kipac   35284192 Feb  8 11:36 /home/users/swmclau2/scratch/catalog_ab_halo_vmax@mpeak.hdf5
-rw-r--r-- 1 swmclau2 kipac   35284192 Feb  8 13:29 /home/users/swmclau2/scratch/catalog_ab_halo_mpeak.hdf5
-rw-r--r-- 1 swmclau2 kipac 8061416824 Feb 12 14:57 /home/users/swmclau2/scratch/catalog_ab_halo_mpeak_large.hdf5
-rw-r--r-- 1 swmclau2 kipac   28564192 Feb 12 16:13 /home/users/swmclau2/scratch/catalog_ab_halo_mpeak_shuffled.hdf5

In [4]:
from collections import Counter
def compute_occupations(halo_catalog, galaxy_catalog):
    #halo_table = cat.halocat.halo_table[cat.halocat.halo_table['halo_mvir'] > min_ptcl*cat.pmass]

    cens_occ = np.zeros((np.sum(halo_catalog['halo_upid'] == -1),))
    #cens_occ = np.zeros((len(halo_table),))
    sats_occ = np.zeros_like(cens_occ)
    detected_central_ids = set(galaxy_catalog[galaxy_catalog['halo_upid']==-1]['halo_id'])
    detected_satellite_upids = Counter(galaxy_catalog[galaxy_catalog['halo_upid']!=-1]['halo_upid'])

    for idx, row  in enumerate(halo_catalog[halo_catalog['halo_upid'] == -1]):
        if idx%1000000 == 0:
            print idx
            
        cens_occ[idx] = 1.0 if row['halo_id'] in detected_central_ids else 0.0
        sats_occ[idx]+= detected_satellite_upids[row['halo_id']]

    return cens_occ, sats_occ

In [5]:
from math import ceil
def compute_mass_bins(prim_haloprop, dlog10_prim_haloprop=0.05):   
    lg10_min_prim_haloprop = np.log10(np.min(prim_haloprop))-0.001
    lg10_max_prim_haloprop = np.log10(np.max(prim_haloprop))+0.001
    num_prim_haloprop_bins = (lg10_max_prim_haloprop-lg10_min_prim_haloprop)/dlog10_prim_haloprop
    return np.logspace(
        lg10_min_prim_haloprop, lg10_max_prim_haloprop,
        num=int(ceil(num_prim_haloprop_bins)))

In [6]:
from glob import glob
#hdf5_files = glob('../*catalog_ab_halo*fixed*.hdf5')
scratch_path = '/home/users/swmclau2/scratch/'
hdf5_files = ['catalog_ab_halo_vmax@mpeak.hdf5','catalog_ab_halo_mpeak.hdf5', 'catalog_ab_halo_mpeak_shuffled.hdf5']
for fname in hdf5_files:
    f = h5py.File(scratch_path+ fname, "r")
    print fname
    print len(f.keys())
    print '*'*50


catalog_ab_halo_vmax@mpeak.hdf5
1
**************************************************
catalog_ab_halo_mpeak.hdf5
1
**************************************************
catalog_ab_halo_mpeak_shuffled.hdf5
1
**************************************************

In [ ]:
paths = ['halo_vmax@mpeak_catalog', 'halo_mpeak_catalog', 'halo_mpeak_shuffled']

In [ ]:
Lbox = 1000.0
#catalog = np.loadtxt('ab_sham_hod_data_cut.npy')
sim = ''
cen_hods = []
sat_hods = []
halo_catalog = astropy.table.Table.read(scratch_path+'catalog_ab_%s_large.hdf5'%('halo_mpeak'), format = 'hdf5')
    
mass_bins = compute_mass_bins(halo_catalog['halo_mvir'], 0.2)
mass_bin_centers = (mass_bins[1:]+mass_bins[:-1])/2.0
simname = 'ds_' if sim=='' else sim
np.savetxt(simname+'mass_bins.npy', mass_bins)

for fname, path in zip(hdf5_files, paths):
    print path
    
    #for ab_property in ('halo_vmax@mpeak', 'halo_mpeak'):
        
    #   for shuffle in (False, True):
            
    #        if shuffle and ab_property=='halo_vmax@mpeak':
    #            continue
    #        if not shuffle:
    #            galaxy_catalog = astropy.table.Table.read('../%scatalog_ab_%s_fixed.hdf5'%(sim,ab_property), format = 'hdf5',
    #                                                     path = '%scatalog_ab_%s.hdf5'%(sim,ab_property))
    #        else:
    galaxy_catalog = astropy.table.Table.read(scratch_path + fname, path = path,  format = 'hdf5')

    cens_occ, sats_occ = compute_occupations(halo_catalog, galaxy_catalog)

    host_halo_mass = halo_catalog[ halo_catalog['halo_upid']==-1]['halo_mvir']
    host_halo_masses.append(host_halo_mass)

    cenmask = galaxy_catalog['halo_upid']==-1
    satmask = galaxy_catalog['halo_upid']>0

    halo_mass = halo_catalog['halo_mvir']

    cen_hod = hod_from_mock(galaxy_catalog['halo_mvir_host_halo'][cenmask], halo_mass, mass_bins)[0]
    sat_hod = hod_from_mock(galaxy_catalog['halo_mvir_host_halo'][satmask], halo_mass, mass_bins)[0]
    
    cen_hods.append(cen_hod)
    sat_hods.append(sat_hod)

    #if not shuffle:
    #    np.savetxt('%scatalog_ab_%s_cen_hod.npy'%(sim,ab_property), cen_hod)
    #    np.savetxt('%scatalog_ab_%s_sat_hod.npy'%(sim,ab_property),sat_hod)
    #else:
    #    np.savetxt('%scatalog_ab_%s_shuffled_cen_hod.npy'%(sim,ab_property), cen_hod)
    #    np.savetxt('%scatalog_ab_%s_shuffled_sat_hod.npy'%(sim,ab_property),sat_hod)


WARNING: path= was not specified but multiple tables are present, reading in first available table (path=halo_mpeak_catalog) [astropy.io.misc.hdf5]
halo_vmax@mpeak_catalog
0
1000000
2000000
3000000
4000000
5000000
6000000
7000000
8000000
9000000
10000000
11000000
12000000
13000000
14000000
15000000
16000000
17000000
18000000
19000000
20000000
21000000
22000000
23000000
24000000
25000000
26000000
27000000
28000000
29000000
30000000
31000000
32000000
33000000
34000000
35000000
36000000
37000000
38000000
39000000
40000000
41000000
42000000
43000000
44000000
45000000
46000000
47000000
48000000
49000000
50000000
51000000
52000000
53000000
54000000
55000000
56000000
57000000
58000000
59000000
60000000
61000000
62000000
63000000
64000000
65000000
66000000
67000000
68000000
69000000
70000000
71000000
72000000
73000000
74000000

In [ ]:
for cen_hod in cen_hods:
    plt.plot(mass_bin_centers, cen_hod)
plt.loglog();

In [ ]:
for sat_hod in sat_hods:
    plt.plot(mass_bin_centers, sat_hod)
plt.loglog();

In [ ]:
from itertools import izip
for cen_hod, sat_hod in izip(cen_hods, sat_hods):
    plt.plot(mass_bin_centers, cen_hod+sat_hod)
plt.loglog();

In [ ]:
plt.hist(halocats['chinchilla']['halo_mvir'], bins = mass_bins, label = 'Chinchilla')
plt.hist(halocats['aardvark']['halo_mvir'], bins = mass_bins, alpha = 0.3, label = 'Aardvark');
plt.legend(loc = 'best')
plt.yscale('log')
plt.xscale('log')
#plt.loglog()

In [ ]:
rbins = np.logspace(-1, 1.7, 15)
rpoints = (rbins[1:]+rbins[:-1])/2

In [ ]:
nd = 1e-3
n_obj_needed = int(nd*(cat.Lbox**3))
halo_clusterings = {}
for simname, halocat in halocats.iteritems():
    sort_idxs = np.argsort(halocat['halo_mvir'])
    tmp_halocat = halocat[sort_idxs[-1*n_obj_needed:]]
    
    print np.min(tmp_halocat['halo_mvir'])
    
    pos = np.c_[tmp_halocat['halo_x'], tmp_halocat['halo_y'], tmp_halocat['halo_z']]
    halo_clusterings[simname] = tpcf(pos, rbins, period=cat.Lbox)

In [ ]:
for simname in simnames:
    plt.plot(rpoints, halo_clusterings[simname], label = simname)

plt.loglog();
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi Halo, nd = %.2e'%nd)

In [ ]:
pos = np.c_[galcats['chinchilla']['halo_x'], galcats['chinchilla']['halo_y'], galcats['chinchilla']['halo_z']]
chin_xi = tpcf(pos, rbins, period=cat.Lbox)

In [ ]:
pos = np.c_[galcats['aardvark']['halo_x'], galcats['aardvark']['halo_y'], galcats['aardvark']['halo_z']]
aard_xi = tpcf(pos, rbins, period=cat.Lbox)

In [ ]:
plt.plot(rpoints, chin_xi, label = 'Chinchilla')
plt.plot(rpoints, aard_xi, label = 'Aardvark')

plt.loglog();
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')

In [ ]:
plt.plot(rpoints, chin_xi/halo_clusterings['chinchilla'], label = 'Chinchilla')
plt.plot(rpoints, aard_xi/halo_clusterings['aardvark'], label = 'Aardvark')

#plt.loglog();
plt.xscale('log')
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')

In [ ]:
plt.plot(rpoints, chin_xi/aard_xi, label = 'Chinchilla/Aardvark')
#plt.plot(rpoints, aard_xi, label = 'Aardvark')

#plt.loglog();
plt.xscale('log')
plt.legend(loc='best')
plt.xlabel('r [Mpc]')
plt.ylabel('xi')

In [ ]:
conc_bins = np.linspace(0, 1, 10)#np.linspace(0, 1000, 10)#np.linspace(0, 22, 10)

In [ ]:
#sns.set_palette(sns.diverging_palette(255, 133, l=60, n=22, center="light"))
colors =sns.cubehelix_palette(22,start=.5, rot=-.75)

In [ ]:
i = 0
fig = plt.figure(figsize = ((10,8)))

for simname, host_halo_mass, cens_occ, sats_occ, ls  in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
    mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)

    host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']

    conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop  = host_halo_mass,\
                                                                  sec_haloprop = host_halo_conc,\
                                                                  prim_haloprop_bin_boundaries = mass_bins)

    mass_bin_nos = range(1,21,1)
    for bin_no, c in zip(mass_bin_nos, colors):
        bin_center = np.mean(mass_bins[bin_no:bin_no+2])
        indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
        cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])

        #med_conc = np.median([indices_of_mb, 5])
        med_conc = 0.5

        (binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      cens_occ[indices_of_mb],bins=conc_bins), \
                                   binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      sats_occ[indices_of_mb],bins=conc_bins)

        cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
        sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')

        c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
        #plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
        #plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
        if i >0:
            plt.plot(c_bin_centers,(binned_cens-cens_avg),color = c, ls = ls)
        else:
            plt.plot(c_bin_centers,(binned_cens-cens_avg),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center), ls = ls)
    i+=1
        
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
plt.ylim([-0.1,1.1])
#plt.xlim([-0.05, 1.2])
#plt.yscale('log')

plt.title(r"$N(c)$ distribution in fixed mass bins, Vpeak SHAM")
plt.show()

In [ ]:
i = 0
fig = plt.figure(figsize = ((10,8)))

for simname, host_halo_mass, cens_occ, sats_occ, ls  in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
    mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)

    host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']

    conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop  = host_halo_mass,\
                                                                  sec_haloprop = host_halo_conc,\
                                                                  prim_haloprop_bin_boundaries = mass_bins)

    mass_bin_nos = range(1,21,1)
    for bin_no, c in zip(mass_bin_nos, colors):
        bin_center = np.mean(mass_bins[bin_no:bin_no+2])
        indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
        cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])

        #med_conc = np.median([indices_of_mb, 5])
        med_conc = 0.5

        (binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      cens_occ[indices_of_mb],bins=conc_bins), \
                                   binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      sats_occ[indices_of_mb],bins=conc_bins)

        cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
        sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')

        c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
        #plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
        #plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
        if i >0:
            plt.plot(c_bin_centers,(binned_cens),color = c, ls = ls)
        else:
            plt.plot(c_bin_centers,(binned_cens),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center), ls = ls)
    i+=1
        
#plt.xscale('log')
plt.legend(loc='best')
#plt.xlim([0,25])
plt.ylim([-0.1,1.1])
plt.xlim([-0.05, 1.2])
#plt.yscale('log')

plt.title(r"$N(c)$ distribution in fixed mass bins, Vpeak SHAM")
plt.show()
mass_bin_nos = range(1,13,1) fig = plt.figure(figsize = ((10,8))) for bin_no, c in zip(mass_bin_nos, colors): bin_center = np.mean(mass_bins[bin_no:bin_no+2]) indices_of_mb = np.where(mass_bin_idxs == bin_no)[0] cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb]) #med_conc = np.median([indices_of_mb, 5]) med_conc = 0.5 (binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\ cens_occ[indices_of_mb],bins=conc_bins), \ binned_statistic(conditional_conc_percentiles[indices_of_mb],\ sats_occ[indices_of_mb],bins=conc_bins) cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\ cens_occ[indices_of_mb], bins = conc_bins, statistic='sum') sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\ sats_occ[indices_of_mb], bins = conc_bins, statistic='sum') c_bin_centers = (c_bins[1:]+c_bins[:-1])/2 #plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) ) #plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center)) plt.plot(c_bin_centers,(binned_cens),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center)) #plt.xscale('log') plt.legend(loc='best') #plt.xlim([0,25]) plt.ylim([1e-5,1.1]) plt.xlim([-0.05, 1.2]) plt.yscale('log') plt.title(r"$N(c)$ distribution in fixed mass bins, Vpeak SHAM") plt.show()

In [ ]:
from collections import defaultdict
mass_bin_nos = range(1,21,1)
binned_censes = defaultdict(list)
cens_avgs = defaultdict(list)
bin_centers = []
for bin_no, c in zip(mass_bin_nos, colors):
    for simname, host_halo_mass, cens_occ, sats_occ, ls  in izip(simnames, host_halo_masses, cens_occs, sats_occs, ['-', '--']):
        mass_bin_idxs = compute_prim_haloprop_bins(prim_haloprop_bin_boundaries=mass_bins, prim_haloprop =host_halo_mass)

        host_halo_conc =halocats[simname][ halocats[simname]['halo_upid']==-1]['halo_nfw_conc']

        conditional_conc_percentiles = compute_conditional_percentiles(prim_haloprop  = host_halo_mass,\
                                                                      sec_haloprop = host_halo_conc,\
                                                                      prim_haloprop_bin_boundaries = mass_bins)

        bin_center = np.mean(mass_bins[bin_no:bin_no+2])
        if simname == 'chinchilla':
            bin_centers.append(bin_center)
            
        indices_of_mb = np.where(mass_bin_idxs == bin_no)[0]
        cens_avg, sats_avg = np.mean(cens_occ[indices_of_mb]), np.mean(sats_occ[indices_of_mb])
        
        cens_avgs[simname].append(cens_avg)

        #med_conc = np.median([indices_of_mb, 5])
        med_conc = 0.5

        (binned_cens, c_bins,_), (binned_sats,_,_) = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      cens_occ[indices_of_mb],bins=conc_bins), \
                                   binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                                      sats_occ[indices_of_mb],bins=conc_bins)
            
        binned_censes[simname].append(binned_cens)

        cen_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                cens_occ[indices_of_mb], bins = conc_bins, statistic='sum')
        sat_bin_counts, _, _ = binned_statistic(conditional_conc_percentiles[indices_of_mb],\
                                                sats_occ[indices_of_mb], bins = conc_bins, statistic='sum')

        c_bin_centers = (c_bins[1:]+c_bins[:-1])/2
        #plt.plot(c_bin_centers,(binned_sats-sats_avg), color = c,lw=2.5, label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center) )
        #plt.errorbar(c_bin_centers,(binned_sats-sats_avg), yerr=np.sqrt(binned_sats/sat_bin_counts),color = c,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))

In [ ]:
for binned_cens_chin, binned_cens_aard, cens_avg_chin, cens_avg_aard, bin_center, c in izip(binned_censes['chinchilla'],\
                                                                    binned_censes['aardvark'],\
                                                                    cens_avgs['chinchilla'],\
                                                                    cens_avgs['aardvark'],\
                                                                    bin_centers, 
                                                                    colors):
    
    print binned_cens_chin
    print cens_avg_chin
    plt.plot(c_bin_centers,(binned_cens_chin-cens_avg_chin),color = c)#,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))
    plt.plot(c_bin_centers,(binned_cens_aard-cens_avg_aard),color = c, ls = '--')
    #plt.xscale('log')
    plt.legend(loc='best')
    #plt.xlim([0,25])
    #plt.ylim([-0.1,1.1])
    plt.xlim([-0.05, 1.2])
    #plt.yscale('log')

    plt.title(r"$N(c|M)$ Chinchilla vs. Aardvark in %.1f $\log M_{\odot}$, Vpeak SHAM, "%np.log10(bin_center))
    plt.show()

In [ ]:
for binned_cens_chin, binned_cens_aard, cens_avg_chin, cens_avg_aard, bin_center, c in izip(binned_censes['chinchilla'],\
                                                                    binned_censes['aardvark'],\
                                                                    cens_avgs['chinchilla'],\
                                                                    cens_avgs['aardvark'],\
                                                                    bin_centers, 
                                                                    colors):
    plt.plot(c_bin_centers,(binned_cens_chin-cens_avg_chin)/(binned_cens_aard-cens_avg_aard),color = c)#,label = r"%.1f $\log M_{\odot}$"%np.log10(bin_center))

    #plt.xscale('log')
    plt.legend(loc='best')
    #plt.xlim([0,25])
    #plt.ylim([-0.1,1.1])
    plt.xlim([-0.05, 1.2])
    #plt.yscale('log')

    plt.title(r"$N(c|M)$ Chinchilla/Aardvark in %.1f $\log M_{\odot}$, Vpeak SHAM, "%np.log10(bin_center))
    plt.show()

In [ ]: